County Level Correction

County Level Analysis for MA

library(tidyverse)
library(latex2exp)
library(patchwork)

Base Functions

source(paste0(here::here(), "/analysis/base_functions/base_functions.R"))
get_melded <- function(alpha_mean = 0.9,
                       alpha_sd = 0.04,
                       alpha_bounds = NA,
                       beta_mean = .15,
                       beta_sd =.09,
                       beta_bounds = NA,
                       s_untested_mean = .025,
                       s_untested_sd = .0225,
                       s_untested_bounds = NA,
                       p_s0_pos_mean = .4,
                       p_s0_pos_sd = .1225,
                       p_s0_pos_bounds = NA,
                       nsamp = 1e3) {

  given_args <- as.list(environment())
  # cat("Arguments to get_melded:\n")
  # print(given_args)

    theta <- tibble(alpha = sample_gamma_density(nsamp,
                                                mean = alpha_mean,
                                                sd = alpha_sd,
                                                bounds = alpha_bounds),
                    beta= sample_beta_density(nsamp,
                                              mean = beta_mean,
                                              sd = beta_sd,
                                              bounds = beta_bounds),
                    P_S_untested = sample_beta_density(nsamp,
                                                       mean = s_untested_mean,
                                                       sd = s_untested_sd,
                                                       bounds = s_untested_bounds)) %>%
        mutate(phi_induced = est_P_A_testpos(P_S_untested = P_S_untested,
                                             alpha = alpha,
                                             beta=beta))
    
   # message(paste0("nrows of theta: ", nrow(theta)))

    # theta contains values sampled from alpha, beta, P_S_untested, and M(theta) = phi_induced
    # induced phi
    phi <- theta$phi_induced

    # approximate induced distribution via a density approximation
    phi_induced_density <- density(x = phi, n = nsamp, adjust = 2, kernel = "gaussian")

    indexes <- findInterval(phi, phi_induced_density$x)

    phi_sampled_density <- phi_induced_density$y[indexes]

    dp_s0_pos <- function(x) {

      beta_density(x,
                   mean=p_s0_pos_mean,
                   sd = p_s0_pos_sd,
                   bounds=p_s0_pos_bounds)
    }

    weights <- (phi_sampled_density/ dp_s0_pos(phi))^(.5)

    post_samp_ind <-sample.int(n=nsamp,
                               size=nsamp,
                               prob=1/weights,
                               replace=TRUE)

    post_melding <- bind_cols(theta[post_samp_ind,],
                     P_A_testpos =  phi[post_samp_ind]) %>%
        select(-phi_induced)

     return(list(post_melding = post_melding, pre_melding = theta))
  #  return(post_melding)
}


#' reformat for plot generation
reformat_melded <- function(melded_df,
                            theta_df,
                            nsamp,
                            p_s0_pos_mean,
                            p_s0_pos_sd,
                            p_s0_pos_bounds) {

  melded_df_long <- melded_df %>%
    pivot_longer(cols=everything()) %>%
    mutate(type = "After Melding")


  melded <- theta_df %>%
    mutate(P_A_testpos = sample_beta_density(nsamp,
                                             mean = p_s0_pos_mean,
                                             sd = p_s0_pos_sd,
                                             bounds = p_s0_pos_bounds)) %>%
    pivot_longer(cols=everything()) %>%
    mutate(type = ifelse(
      name == "phi_induced",
      "Induced", "Before Melding")) %>%
    mutate(name = ifelse(name == "phi_induced",
                         "P_A_testpos",
                         name)) %>%
    bind_rows(melded_df_long) %>%
    mutate(name = case_when(
      name == "alpha" ~"$\\alpha$",
      name == "beta" ~"$\\beta$",
      name == "P_A_testpos" ~ "$P(S_0|test+,untested)$",
      name == "P_S_untested" ~ "$P(S_1|untested)$")
    ) %>%
    mutate(name = factor(name,
                         levels = c(
                           "$\\alpha$",
                           "$\\beta$",
                           "$P(S_1|untested)$",
                           "$P(S_0|test+,untested)$")))

}



plot_melded <- function(melded, custom_title="", nsamp) {
  
  
  p1 <- melded %>%
    filter(name != "$P(S_0|test+,untested)$") %>%
    ggplot(aes(x = value, fill = type)) +
    geom_density(alpha = .5, show.legend=FALSE) +
    facet_wrap(~name,
               labeller = as_labeller(
                 TeX,   default = label_parsed),
               ncol = 3,
               scales = "fixed") +
    theme_bw() +
    theme(
          # axis.text.y = element_blank(),
          # axis.ticks.y = element_blank(),
          axis.title = element_text(size = 18),
          axis.text.x = element_text(size = 10),
          plot.title =element_text(size = 18,
                                   margin =margin(0,0, .5,0, 'cm')),
          strip.text = element_text(size = 16),
          legend.text = element_text(size = 16)) +
    labs(title = TeX(custom_title,bold=TRUE),
         subtitle =paste0("Number of Samples: ", nsamp),
         fill = "",
         y = "Density") +
    scale_fill_manual(values = c("#5670BF", "#418F6A","#B28542")) +
    guides(fill = guide_legend(keyheight = 2,  keywidth = 2))
  
  p2 <- melded %>%
    filter(name == "$P(S_0|test+,untested)$") %>%
    ggplot(aes(x = value, fill = type)) +
    geom_density(alpha = .5) +
    facet_wrap(~name,
               labeller = as_labeller(
                 TeX,   default = label_parsed),
               ncol = 3,
               scales = "fixed") +
    theme_bw() +
    theme(
          # axis.text.y = element_blank(),
          # axis.ticks.y = element_blank(),
          axis.title = element_text(size = 18),
          axis.text.x = element_text(size = 10),
          plot.title =element_text(size = 18,
                                   margin =margin(0,0, .5,0, 'cm')),
          strip.text = element_text(size = 16),
          legend.text = element_text(size = 18)) +
    labs(
      #title = paste0("Number of Samples: ", nsamp),
         fill = "",
         y = "Density") +
    scale_fill_manual(values = c("#5670BF", "#418F6A","#B28542")) +
    guides(fill = guide_legend(keyheight = 2,  keywidth = 2)) +
    xlim(0,1)
  
  
  p1 / p2 +  plot_layout(nrow =2, widths = c(4,1))
  
}



plot_melded <- function(melded, custom_title="", nsamp) {
  
  
  p1 <- melded %>%
    filter(name != "$P(S_0|test+,untested)$") %>%
    ggplot(aes(x = value, fill = type)) +
    geom_density(alpha = .5, show.legend=FALSE) +
    facet_wrap(~name,
               labeller = as_labeller(
                 TeX,   default = label_parsed),
               ncol = 3,
               scales = "fixed") +
    theme_bw() +
    theme(
          # axis.text.y = element_blank(),
          # axis.ticks.y = element_blank(),
          axis.title = element_text(size = 18),
          axis.text.x = element_text(size = 10),
          plot.title =element_text(size = 18,
                                   margin =margin(0,0, .5,0, 'cm')),
          strip.text = element_text(size = 16),
          legend.text = element_text(size = 16)) +
    labs(title = TeX(custom_title,bold=TRUE),
         subtitle =paste0("Number of Samples: ", nsamp),
         fill = "",
         y = "Density") +
    scale_fill_manual(values = c("#5670BF", "#418F6A","#B28542")) +
    guides(fill = guide_legend(keyheight = 2,  keywidth = 2))
  
  p2 <- melded %>%
    filter(name == "$P(S_0|test+,untested)$") %>%
    ggplot(aes(x = value, fill = type)) +
    geom_density(alpha = .5) +
    facet_wrap(~name,
               labeller = as_labeller(
                 TeX,   default = label_parsed),
               ncol = 3,
               scales = "fixed") +
    theme_bw() +
    theme(
          # axis.text.y = element_blank(),
          # axis.ticks.y = element_blank(),
          axis.title = element_text(size = 18),
          axis.text.x = element_text(size = 10),
          plot.title =element_text(size = 18,
                                   margin =margin(0,0, .5,0, 'cm')),
          strip.text = element_text(size = 16),
          legend.text = element_text(size = 18)) +
    labs(
      #title = paste0("Number of Samples: ", nsamp),
         fill = "",
         y = "Density") +
    scale_fill_manual(values = c("#5670BF", "#418F6A","#B28542")) +
    guides(fill = guide_legend(keyheight = 2,  keywidth = 2)) +
    xlim(0,1)
  
  
  p1 / p2 +  plot_layout(nrow =2, widths = c(4,1))
  
}

Initial Setup

Running analysis for ma.

Set Prior Parameters

# include slow plots comparing melded distributions for modified priors to original melded distribution
include_slow <- TRUE
# only include subset of rows for testing 
testing <- TRUE
# whether to save results adj_v*.RDS (if testing, don't save results)
save <- FALSE

set.seed(123)


prior_params <- list(
  alpha_mean = .95,
  alpha_sd = 0.08,
  alpha_bounds = NA,
 # alpha_bounds = c(.8,1),
  beta_mean = .15,
  beta_sd =.09,
  beta_bounds = NA,
#  beta_bounds = c(0.002, 0.4),
  s_untested_mean = .03,
  s_untested_sd = .0225,
#  s_untested_bounds = c(0.0018, Inf),
  s_untested_bounds = NA,
  p_s0_pos_mean = .4,
  p_s0_pos_sd = .1225,
 p_s0_pos_bounds = NA,
#  p_s0_pos_bounds = c(.25, .7),
  nsamp = 1e5)

corrected_sample_reps <- 1e3

# relevant for versions 2,3
beta_smoothing_span <- .1
# relevant for versions 3,4
s_untested_smoothing_span <- .2

Read Data

state_name <- params$state
# for running when NOT knitting
if(!exists("state_name")) state_name <- "ma"


state_data_path <- paste0(here::here(), 
                          "/data/county_level/",
                          state_name, 
                          "/",
                          state_name, "_county_biweekly.RDS")


results_path <-  paste0(
  here::here(), 
  "/analysis/results/adj_biweekly_county/",
  state_name, "/")



# objects before this point should not be removed 
# when cleaning environment before running each version
do_not_remove <- c(ls(), "do_not_remove")



covid_county <- readRDS(state_data_path)
# full descriptions by version
versions <- tibble(
  version = c("v1", "v2", "v3", "v4"),
  vlabel = c("Priors Do Not Vary by County and Date", 
             "Distribution for Beta is Centered at Empirical Value",
             "Distributions for P(S_1|untested) and Beta Centered at Empirical Values",
             "Distribution for P(S_1|untested) Centered at Empirical Value")
)

Analysis

Version 1

Priors do not vary by state or date; melding performed once.

priors_version <- "v1"


melded <- do.call(get_melded, prior_params)

saveRDS(melded$post_melding,
        paste0(
  here::here(),
  "/analysis/results/melded/constrained_v1.RDS"))


# only use a few rows if testing
covid_county <- if(testing) covid_county[1:4,] else covid_county

tictoc::tic()
corrected <- pmap_df(
 # covid_county[1:4,],
  covid_county,
  ~ {
    process_priors_per_county(
      priors = melded$post_melding,
      county_df = list(...),
      nsamp = prior_params$nsamp) %>%
      generate_corrected_sample(., num_reps = corrected_sample_reps) %>%
      summarize_corrected_sample()
    })
tictoc::toc()
## 3.857 sec elapsed
saveRDS(corrected,
        paste0(results_path,
        "adj_",
               priors_version, 
               ".RDS"))
title <- paste0("Pre and Post Melding Distributions")
       
melded_long <- reformat_melded(melded_df =  melded$post_melding,
                                      theta_df =  melded$pre_melding,
                                      p_s0_pos_mean = prior_params$p_s0_pos_mean,
                                      p_s0_pos_sd = prior_params$p_s0_pos_sd,
                                      p_s0_pos_bounds = prior_params$p_s0_pos_bounds,
                                      nsamp = prior_params$nsamp)

melded_long %>%
         plot_melded(custom_title = title,
                     nsamp = prior_params$nsamp)

Version 2

Time varying estimate of beta using the state-level estimate.

# remove objects from previous version
remove(list = ls()[!ls() %in% do_not_remove] )

covid_county <- readRDS(state_data_path)


source(paste0(here::here(), "/analysis/base_functions/base_functions.R"))

priors_version <- "v2"

fb_symptoms <- readRDS(
  paste0(
    here::here(), 
    "/data/state_level/screeningpos_all_states.RDS")) %>%
  filter(state == state_name)



beta_prior <-  tibble(value = sample_beta_density(1e5,
                          mean = prior_params$beta_mean,
                          sd = prior_params$beta_sd,
                          bounds = prior_params$beta_bounds),
                      type = "Prior on Beta")
################################################################################################
# compare empirical distribution across all dates to prior on beta 
################################################################################################
fb_symptoms %>% 
  select(signal, date, value, stderr) %>% 
  pivot_wider(names_from = signal,
              values_from = c(value,stderr)) %>%
  mutate(beta_est = value_smoothed_wscreening_tested_positive_14d/
           value_smoothed_wtested_positive_14d)%>%
  select(value = beta_est) %>%
  mutate(type = "Estimate for Beta\n(Screening Test Positivity/Test Positivity)") %>%
  bind_rows(beta_prior) %>%
  ggplot(aes(x=value, fill = type)) +
  geom_density(alpha = .6) +
  theme_bw() +
  labs(title = "Comparing Prior for Beta to Empirical Distribution\n from COVID-19 Trends and Impact Survey Data Across All Dates",
       subtitle = paste0("State: ", toupper(state_name)),
       fill = "") +
  theme_c()

dates <- readRDS(paste0(here::here(), "/data/date_to_biweek.RDS"))

beta_est <- fb_symptoms %>% 
  select(signal, date, value, stderr) %>% 
  pivot_wider(names_from = signal,
              values_from = c(value,stderr)) %>%
  mutate(beta_estimate = value_smoothed_wscreening_tested_positive_14d
         /value_smoothed_wtested_positive_14d) %>%
  arrange(date) %>%
  mutate(index = 1:nrow(.)) %>%
  ungroup() %>%
  # fill last weeks (missing from survey data) with rolling mean from previous
  # 3 observations
  mutate(rolled_mean = RcppRoll::roll_mean(beta_estimate, n = 3, na.rm = FALSE, fill = NA)) %>%
  fill(rolled_mean, .direction = "down") %>%
  mutate(beta_estimate = ifelse(is.na(beta_estimate), rolled_mean, beta_estimate))

smoothed_beta <- loess(beta_estimate~index, data= beta_est, span = beta_smoothing_span)

beta_est <- beta_est %>%
  mutate(beta_estimate_smoothed = predict(smoothed_beta)) 
################################################################################################
# look at empirical beta estimates across time 
################################################################################################
beta_est  %>%
  ggplot(aes(x=date, y = beta_estimate)) +
  geom_line() +
  geom_point(alpha = .5) +
  geom_line(aes(y=beta_estimate_smoothed), color = "darkred", size = 1.2) +
  theme_c() +
  labs(title = "Estimates of Beta across Time",
       subtitle = paste0("State: ", toupper(state_name)),
       y = "Estimate of Beta") +
  scale_x_date(date_breaks = "2 months", date_labels = "%b %d")

################################################################################################
# summarize to one observation per biweek 
################################################################################################
symptoms <- beta_est %>%
  select(date, beta_estimate_smoothed) %>%
  left_join(dates) %>%
  group_by(biweek) %>%
  # get last date since observation for date corresponds to value for previous
  # 2 weeks
  slice_max(n=1, order_by = date) %>%
  ungroup() %>%
  select(-date)
# COMPARE PREMELDING DISTRIUBTIONS

compare_priors <- symptoms %>%
  # only need biweeks in dates data frame
  filter(!is.na(biweek)) %>%
  pmap_df(~ {
    df <- tibble(...)
    #glimpse(df)
    #print(df$beta_estimate_smoothed)
    tibble(empirical = sample_beta_density(
      1e4, 
      mean = df$beta_estimate_smoothed, 
      sd = prior_params$beta_sd),
      original_prior = sample_beta_density(
        1e4,
        mean = prior_params$beta_mean,
        sd = prior_params$beta_sd),
      biweek = df$biweek)
    })


# compare (unconstrained) distributions
compare_priors %>%
  pivot_longer(c(empirical,original_prior), 
               names_to = "Prior") %>%
  mutate(biweek = as.factor(biweek)) %>%
  ggplot(aes(x = value, y=fct_reorder(biweek, 
                                      as.numeric(biweek),
                                      .desc=TRUE), 
             fill = Prior)) +
  ggridges::geom_density_ridges(alpha = .6) +
  labs(y = "Biweek",
       title = "Comparing Distribution Centered\nat Empirical Estimate of Beta\nto Original Prior\n(Not Melded)") +
  theme_c(legend.title = element_text(face="bold", size = 16))
covid_county <- covid_county %>% 
  left_join(symptoms) %>%
  # only have CTIS data starting at week 6
  # filter out the beginning dates where beta_estimate_smoothed is NA
  filter(!is.na(beta_estimate_smoothed))


priors_constrained_by_biweek <- covid_county %>% 
  select(biweek,beta_estimate_smoothed) %>%
  arrange(biweek) %>%
  # there will be more than one observation per county since
  # beta estimates are at the state level
  distinct() %>%
  pmap_df(~ {
    args <- list(...)
   # message(paste0("before: ",prior_params$beta_mean))
    prior_params$beta_mean <- args$beta_estimate_smoothed
   # message(paste0("after: ",prior_params$beta_mean))
    res <- do.call(get_melded, prior_params)
    res$post_melding %>%
      mutate(biweek= args$biweek)
})

Compare Melded Priors to Original Across Time

melded_original <- readRDS(paste0(
  here::here(),
  "/analysis/results/melded/constrained_v1.RDS"))

# plot melded distributions and compare to original
original <- map_df(6:30, ~{ 
  melded_original %>%
    select(beta) %>%
    mutate(source = "original_prior",
           biweek = .x)})
 

priors_constrained_by_biweek %>%
  mutate(source = "melded distribution\ncentered at empirical value") %>%
  select(beta, biweek, source) %>%
  bind_rows(original) %>%
  mutate(biweek = as.factor(biweek)) %>%
  ggplot(aes(x = beta, y=fct_reorder(biweek, 
                                      as.numeric(biweek),
                                      .desc=TRUE), 
             fill = source)) +
  ggridges::geom_density_ridges(alpha = .6) +
  labs(y = "Biweek",
       title = "Comparing Post-melding Distribution Centered\nat Empirical Estimate of Beta\nto Original Prior") +
  theme_c(legend.title = element_text(face="bold", size = 16))

# only use a few rows if testing
covid_county <- if(testing) covid_county[1:4,] else covid_county

tictoc::tic()
corrected <- pmap_df(
 # covid_county[1:4,], 
  covid_county,
  ~ { 
    
    input_df <- list(...)
    message(paste0(input_df$biweek, ", " , input_df$fips))
    process_priors_per_county(
      priors = priors_constrained_by_biweek[priors_constrained_by_biweek$biweek == input_df$biweek,],
      county_df = input_df,
      nsamp = prior_params$nsamp) %>%
      generate_corrected_sample(., 
                                num_reps = corrected_sample_reps) %>%
      summarize_corrected_sample()
                                  })

tictoc::tic()
saveRDS(corrected,
        paste0(results_path,
        "adj_",
               priors_version, 
               ".RDS"))

Version 3

Time varying estimate of beta using the state-level estimate, time-varying estimate of \(P(S_1|untested)\) using the state-level estimate of the percentage of the population experiencing COVID-like illness.

# remove objects from previous version
remove(list = ls()[!ls() %in% do_not_remove] )

covid_county <- readRDS(state_data_path)

dates <- readRDS(paste0(here::here(), "/data/date_to_biweek.RDS"))

source(paste0(here::here(), "/analysis/base_functions/base_functions.R"))

priors_version <- "v3"

fb_symptoms <- readRDS(
  paste0(
    here::here(), 
    "/data/state_level/screeningpos_all_states.RDS")) %>%
  filter(state == state_name)
emp_est <- fb_symptoms %>% 
  select(signal, date, value, state) %>% 
  pivot_wider(names_from = signal,
              values_from = value) %>%
  mutate(
    beta_estimate = smoothed_wscreening_tested_positive_14d/smoothed_wtested_positive_14d,
         s_untested_estimate = smoothed_wcli) %>%
  select(date, beta_estimate, s_untested_estimate) %>%
  arrange(date) %>%
  mutate(index = 1:nrow(.)) %>%
  # fill last weeks (missing from survey data) with rolling mean from previous
  # 3 observations
  mutate(rolled_mean_beta = RcppRoll::roll_mean(beta_estimate, 
                                           n = 3, 
                                           na.rm = FALSE, 
                                           fill = NA)) %>%
  fill(rolled_mean_beta, .direction = "down") %>%
  mutate(beta_estimate = ifelse(is.na(beta_estimate),
                                rolled_mean_beta, 
                                beta_estimate))

smoothed_beta <- loess(beta_estimate~index, data= emp_est, span = beta_smoothing_span)
smoothed_s_untested <-  loess(s_untested_estimate~index, data= emp_est, span = s_untested_smoothing_span)

emp_est <- emp_est %>%
  mutate(beta_estimate_smoothed = predict(smoothed_beta),
         s_untested_smoothed = predict(smoothed_s_untested)) 
# plot beta estimates across time
emp_est %>% 
  ggplot(aes(x=date, y = beta_estimate)) +
  geom_line() +
  geom_point(alpha = .5) +
  geom_line(aes(y=beta_estimate_smoothed), color = "darkred", size = 1.2) +
  theme_c() +
  labs(title = "Estimates of Beta across Time",
       subtitle = paste0("State: ", toupper(state_name)),
       y = "Estimate of Beta")+
  scale_x_date(date_breaks = "2 months", date_labels = "%b %d")

# plot P(S_1|untested) estimates across time
emp_est %>%
  ggplot(aes(x=date, y = s_untested_estimate)) +
  geom_line() +
  geom_point(alpha = .5) +
  geom_line(aes(y=s_untested_smoothed), color = "darkred", size = 1.2) +
  labs(title = TeX("Estimates of $P(S_1|untested)$ Across Time"),
       y = "Percentage COVID-like Illness") +
  theme_c()+
  scale_x_date(date_breaks = "2 months", date_labels = "%b %d")

#############################################################
# summarize to one observation per biweek 
#############################################################
symptoms <- emp_est %>%
  select(date, beta_estimate_smoothed, s_untested_smoothed) %>%
  left_join(dates) %>%
  group_by(biweek) %>%
  # get last date since observation for date corresponds to value for previous
  # 2 weeks
  slice_max(n=1, order_by = date) %>%
  ungroup() %>%
  select(-date)

covid_county <- covid_county %>% 
  left_join(symptoms) %>%
  # only have CTIS data starting at week 6
  # filter out the beginning dates where beta_estimate_smoothed is NA
  filter(!is.na(beta_estimate_smoothed))


priors_constrained_by_biweek <- covid_county %>% 
  select(biweek,beta_estimate_smoothed, s_untested_smoothed) %>%
  arrange(biweek) %>%
  # there will be more than one observation per county since
  # beta estimates are at the state level
  distinct() %>%
  pmap_df(~ {
    args <- list(...)

    # message(paste0("before: beta_mean = ", prior_params$beta_mean),
    #         "\ns_untested_mean = ", prior_params$s_untested_mean)
    
    prior_params$beta_mean <- args$beta_estimate_smoothed
    prior_params$s_untested_mean <- args$s_untested_smoothed
    
    # message(paste0("after: beta_mean = ", prior_params$beta_mean),
    #         "\ns_untested_mean = ", prior_params$s_untested_mean)
    
    res <- do.call(get_melded, prior_params)
    res$post_melding %>%
      mutate(biweek= args$biweek)
  })

Compare Melded Priors to Original Across Time

melded_original <- readRDS(paste0(
  here::here(),
  "/analysis/results/melded/constrained_v1.RDS"))

# plot melded distributions and compare to original
original <- map_df(6:30, ~{ 
  melded_original %>%
    select(beta, P_S_untested) %>%
    mutate(source = "original_prior",
           biweek = .x)}) %>%
  pivot_longer(c(beta,P_S_untested), 
               names_to = "variable")
 

 
priors_constrained_by_biweek %>%
  select(biweek, P_S_untested, beta) %>%
  pivot_longer(c(P_S_untested,beta), names_to = "variable") %>%
  mutate(source = "melded distribution\ncentered at empirical value") %>%
  bind_rows(original) %>%
  mutate(biweek = as.factor(biweek),
         variable = gsub("P_S_untested", "$P(S_1|untested)$", variable),
         variable = gsub("beta", "$\\beta$", variable, fixed = TRUE)) %>%
  ggplot(aes(x = value, y=fct_reorder(biweek, 
                                      as.numeric(biweek),
                                      .desc=TRUE), 
             fill = source)) +
  ggridges::geom_density_ridges(alpha = .6) +
  facet_wrap(~variable, labeller=  as_labeller(TeX,
                                    default = label_parsed)) +
  labs(y = "Biweek",
       title = TeX("Comparing Post-melding Distributions Centered"),
      subtitle = TeX("at Empirical Estimate of $\\beta$ and $P(S_1|untested)$ to Original Prior")) +
  theme_c(legend.title = element_text(face="bold", size = 16),
          plot.title = element_text(size = 20, hjust = .5),
          plot.subtitle = element_text(size = 20, hjust = .5))

rm(original)
rm(melded_original)
# only use a few rows if testing
covid_county <- if(testing) covid_county[1:4,] else covid_county

tictoc::tic()
corrected <- pmap_df(
#  covid_county[1:3,], 
  covid_county,
  ~ { 
    
    input_df <- list(...)
    message(paste0(input_df$biweek, ", " , input_df$fips))
    process_priors_per_county(
      priors = priors_constrained_by_biweek[priors_constrained_by_biweek$biweek == input_df$biweek,],
      county_df = input_df,
      nsamp = prior_params$nsamp) %>%
      generate_corrected_sample(., 
                                num_reps = corrected_sample_reps) %>%
      summarize_corrected_sample()
                                  })

tictoc::tic()
saveRDS(corrected,
        paste0(results_path,
        "adj_",
               priors_version, 
               ".RDS"))

Version 4

Only using time-varying estimate of \(P(S_1|untested)\) using the state-level estimate of the percentage of the population experiencing COVID-like illness.

# remove objects from previous version
remove(list = ls()[!ls() %in% do_not_remove] )

covid_county <- readRDS(state_data_path)

dates <- readRDS(paste0(here::here(), "/data/date_to_biweek.RDS"))

source(paste0(here::here(), "/analysis/base_functions/base_functions.R"))

priors_version <- "v4"

fb_symptoms <- readRDS(
  paste0(
    here::here(), 
    "/data/state_level/screeningpos_all_states.RDS")) %>%
  filter(state == state_name)
emp_est <- fb_symptoms %>% 
  select(signal, date, value, state) %>% 
  pivot_wider(names_from = signal,
              values_from = value) %>%
  mutate( s_untested_estimate = smoothed_wcli) %>%
  select(date, s_untested_estimate) %>%
  arrange(date) %>%
  mutate(index = 1:nrow(.)) 

smoothed_s_untested <-  loess(s_untested_estimate~index, data= emp_est, span = s_untested_smoothing_span)

emp_est <- emp_est %>%
  mutate(s_untested_smoothed = predict(smoothed_s_untested)) 
# plot P(S_1|untested) estimates across time
emp_est %>%
  ggplot(aes(x=date, y = s_untested_estimate)) +
  geom_line() +
  geom_point(alpha = .5) +
  geom_line(aes(y=s_untested_smoothed), color = "darkred", size = 1.2) +
  labs(title = TeX("Estimates of $P(S_1|untested)$ Across Time"),
       y = "Percentage COVID-like Illness") +
  theme_c()+
  scale_x_date(date_breaks = "2 months", date_labels = "%b %d")

#############################################################
# summarize to one observation per biweek 
#############################################################
symptoms <- emp_est %>%
  select(date, s_untested_smoothed) %>%
  left_join(dates) %>%
  group_by(biweek) %>%
  # get last date since observation for date corresponds to value for previous
  # 2 weeks
  slice_max(n=1, order_by = date) %>%
  ungroup() %>%
  select(-date)

covid_county <- covid_county %>% 
  left_join(symptoms) %>%
  # only take biweeks >= 6 since this is where we have CTIS data
  filter(!is.na(s_untested_smoothed))


priors_constrained_by_biweek <- covid_county %>% 
  select(biweek, s_untested_smoothed) %>%
  arrange(biweek) %>%
  # there will be more than one observation per county since
  # beta estimates are at the state level
  distinct() %>%
  pmap_df(~ {
    args <- list(...)

    # message(paste0("before: beta_mean = ", prior_params$beta_mean),
    #         "\ns_untested_mean = ", prior_params$s_untested_mean)
    
    prior_params$beta_mean <- args$beta_estimate_smoothed
    prior_params$s_untested_mean <- args$s_untested_smoothed
    
    # message(paste0("after: beta_mean = ", prior_params$beta_mean),
    #         "\ns_untested_mean = ", prior_params$s_untested_mean)
    
    res <- do.call(get_melded, prior_params)
    res$post_melding %>%
      mutate(biweek= args$biweek)
  })

Compare Melded Priors to Original Across Time

melded_original <- readRDS(paste0(
  here::here(),
  "/analysis/results/melded/constrained_v1.RDS"))

# plot melded distributions and compare to original
original <- map_df(6:30, ~{ 
  melded_original %>%
    select(P_S_untested) %>%
    mutate(source = "original_prior",
           biweek = .x)}) %>%
  pivot_longer(c(P_S_untested), 
               names_to = "variable")
 

 
priors_constrained_by_biweek %>%
  select(biweek, P_S_untested) %>%
  pivot_longer(c(P_S_untested), names_to = "variable") %>%
  mutate(source = "melded distribution\ncentered at empirical value") %>%
  bind_rows(original) %>%
  mutate(biweek = as.factor(biweek),
         variable = gsub("P_S_untested", "$P(S_1|untested)$", variable)) %>%
  ggplot(aes(x = value, y=fct_reorder(biweek, 
                                      as.numeric(biweek),
                                      .desc=TRUE), 
             fill = source)) +
  ggridges::geom_density_ridges(alpha = .6) +
  labs(y = "Biweek",
       title = TeX("Comparing Post-melding Distribution Centered"),
       subtitle =  TeX("at Empirical Estimate of $P(S_1|untested)$ to Original Prior")) +
  theme_c(legend.title = element_text(face="bold", size = 16),
          plot.subtitle = element_text(hjust = .5, size = 20),
          plot.title = element_text(hjust = .5, size = 20)) 

rm(original)
rm(melded_original)
# only use a few rows if testing
covid_county <- if(testing) covid_county[1:4,] else covid_county

tictoc::tic()
corrected <- pmap_df(
#  covid_county[1:3,], 
  covid_county,
  ~ { 
    
    input_df <- list(...)
    message(paste0(input_df$biweek, ", " , input_df$fips))
    process_priors_per_county(
      priors = priors_constrained_by_biweek[priors_constrained_by_biweek$biweek == input_df$biweek,],
      county_df = input_df,
      nsamp = prior_params$nsamp) %>%
      generate_corrected_sample(., 
                                num_reps = corrected_sample_reps) %>%
      summarize_corrected_sample()
                                  })

tictoc::tic()
saveRDS(corrected,
        paste0(results_path,
        "adj_",
               priors_version, 
               ".RDS"))